library(dtwclust)
library(dtw)
df<-read.csv("../clean_data/gap_list_year.csv")
df
df <- subset(df, select = -c(X) )
df
#colnames(df)[colnames(df) == "X2000"] = "2000"
colnames(df)[colnames(df) == "X2003"] = "2003"
colnames(df)[colnames(df) == "X2005"] = "2005"
colnames(df)[colnames(df) == "X2007"] = "2007"
colnames(df)[colnames(df) == "X2009"] = "2009"
colnames(df)[colnames(df) == "X2011"] = "2011"
colnames(df)[colnames(df) == "X2013"] = "2013"
colnames(df)[colnames(df) == "X2015"] = "2015"
colnames(df)[colnames(df) == "X2017"] = "2017"
colnames(df)[colnames(df) == "X2019"] = "2019"
colnames(df)[colnames(df) == "X2022"] = "2022"
df
jurisdiction = df[['Jurisdiction']]
dtw_df <- df[, -1]
dtw_df
df_lst <- tslist(dtw_df)
remove_nan <- function(ts) {
ts[!is.na(ts)]
}
# Apply the function to each time series in the list
df_lst <- lapply(df_lst, remove_nan)
head(df_lst)
$`1`
[1] 0 0 1 0 -1 1 0 0 -1 5
$`2`
[1] 4 1 1 2 2 1 3 0 1 -4
$`3`
[1] 4 6 3 0 3 0 3 3 5 6
$`4`
[1] -2 1 1 3 0 -1 0 2 3 3
$`5`
[1] 4 2 2 2 1 3 3 2 3 7
$`6`
[1] 4 3 3 2 3 2 3 -1 6 6
df_cvi <- list()
for (i in 2:10){
df_clust <- tsclust(df_lst, type = "partitional", k = i, distance = "dtw_basic", centroid = "pam")
df_metric <- cvi(df_clust, type = "valid", log.base = 10)
df_cvi <- append(df_cvi, list(df_metric))
}
df_cvi_ma <- do.call(rbind, df_cvi)
rw <- c("K2","K3","K4","K5","K6","K7","K8","K9","K10")
rownames(df_cvi_ma) <- rw
print(df_cvi_ma)
Sil SF CH DB DBstar D COP
K2 0.19695280 2.747998e-09 13.971487 2.108494 2.108494 0.11428571 0.5074697
K3 -0.02290605 6.969980e-13 1.627119 4.733586 5.263889 0.07317073 0.5586390
K4 0.05245045 5.773160e-15 6.888889 3.069093 3.571085 0.13333333 0.3582748
K5 0.09521117 0.000000e+00 6.091584 2.295930 2.394086 0.16666667 0.3421717
K6 0.06723006 5.528911e-14 8.159091 1.186120 1.200318 0.16000000 0.3343282
K7 0.04354251 0.000000e+00 6.305481 1.874516 2.115908 0.16666667 0.3012377
K8 0.06272406 0.000000e+00 6.592334 1.319638 1.413636 0.16000000 0.3015974
K9 0.01827154 0.000000e+00 4.165761 1.697016 1.788215 0.12121212 0.3086793
K10 0.02299449 0.000000e+00 4.811637 1.458277 1.695250 0.13333333 0.2643559
– “Sil” (!): Silhouette index (Rousseeuw (1987); to be maximized).-K4 – “SF” (~): Score Function (Saitta et al. (2007); to be maximized; see notes). – “CH” (~): Calinski-Harabasz index (Arbelaitz et al. (2013); to be maximized).-k3 – “DB” (?): Davies-Bouldin index (Arbelaitz et al. (2013); to be minimized).k4 – “DBstar” (?): Modified Davies-Bouldin index (DB*) (Kim and Ramakrishna (2005); to be minimized). -k4 – “D” (!): Dunn index (Arbelaitz et al. (2013); to be maximized). k5 – “COP” (!): COP index (Arbelaitz et al. (2013); to be minimized). k9
# different seeds
df_cvi2 <- list()
for (i in 1:100){
df_clust2 <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw_basic", centroid = "pam", seed=i)
df_metric2 <- cvi(df_clust2, type = "valid", log.base = 10)
df_cvi2 <- append(df_cvi2, list(df_metric2))
}
df_cvi_ma2 <- do.call(rbind, df_cvi2)
rw2 <- as.character(seq(1, 100))
rownames(df_cvi_ma2) <- rw2
print(df_cvi_ma2)
Sil SF CH DB DBstar D COP
1 0.039531231 6.661338e-16 6.575688 2.011872 2.198338 0.11764706 0.3797562
2 0.096560191 1.443290e-14 12.124320 2.130396 2.456827 0.14285714 0.3985609
3 0.096098075 9.769963e-15 11.536364 2.185333 2.341991 0.13333333 0.3950680
4 0.085772628 1.084488e-11 6.872642 2.128427 2.291982 0.15384615 0.3571650
5 0.081195794 4.085621e-14 8.285256 2.231410 2.458980 0.12500000 0.3698694
6 0.092224842 1.665335e-14 9.801127 2.051479 2.142135 0.12121212 0.3572774
7 0.093685583 2.309264e-14 9.182616 1.978579 1.984754 0.12121212 0.3715896
8 0.100228613 1.021405e-14 11.400458 2.097921 2.215728 0.08571429 0.3885335
9 0.045327099 2.353673e-14 9.077882 2.687935 3.142600 0.12903226 0.3818602
10 0.123961843 3.108624e-15 6.761021 3.709804 4.182907 0.14285714 0.3665954
11 0.099296687 3.108624e-15 10.228352 1.818100 1.894257 0.15384615 0.3644717
12 0.111892510 1.929568e-13 8.863524 2.434562 2.466947 0.11764706 0.3570287
13 0.059856755 3.130829e-14 9.641026 2.069612 2.287603 0.12903226 0.3547075
14 0.088114684 3.108624e-15 9.979452 2.899726 3.142572 0.14285714 0.3876581
15 0.092165584 1.466383e-12 7.909385 2.421452 2.473995 0.11764706 0.3681672
16 0.099544728 1.265654e-14 14.113803 1.927401 2.046487 0.09090909 0.4024175
17 0.080685908 5.107026e-15 11.032864 2.214818 2.426149 0.15384615 0.3563187
18 0.098502514 4.787748e-11 6.890432 1.713426 1.713426 0.14285714 0.3700740
19 0.060619475 3.175238e-14 9.035659 1.798700 1.850234 0.11428571 0.3727612
20 0.040074364 1.509903e-14 9.632637 1.783688 1.834691 0.11428571 0.3710492
21 0.138942647 3.552714e-15 6.216931 1.970950 2.033097 0.18518519 0.4449575
22 0.057423081 1.554312e-15 8.206349 2.263055 2.456323 0.12500000 0.3876282
23 0.014786718 0.000000e+00 6.246408 4.078886 4.444957 0.09756098 0.4058384
24 0.065037131 1.920131e-11 7.255760 1.976575 2.093944 0.09375000 0.3684708
25 0.078464897 8.659740e-15 9.091926 2.795879 3.275223 0.14285714 0.3432126
26 0.144922631 1.199041e-14 8.877778 1.996296 2.050926 0.12500000 0.3620229
27 0.008047798 2.220446e-16 6.307805 3.359145 4.131466 0.11764706 0.3875272
28 0.038132780 4.218847e-15 9.834802 2.433551 2.861002 0.11428571 0.4109847
29 0.100981386 6.439294e-15 9.191607 3.393723 3.935498 0.14285714 0.3591597
30 0.064880895 2.540190e-12 7.225425 2.074411 2.273689 0.12500000 0.3832830
31 -0.004992945 3.330669e-15 7.027717 2.214143 2.530649 0.11428571 0.3879082
32 0.081753414 1.776357e-15 7.225098 2.805950 3.223840 0.11764706 0.3746377
33 0.113156828 7.828627e-12 14.792561 1.572387 1.672168 0.24000000 0.3941288
34 0.083112863 6.661338e-15 6.914943 2.805727 2.856277 0.15384615 0.3726808
35 0.067187785 1.332268e-15 8.047945 2.309055 2.591089 0.13333333 0.3723106
36 0.089734671 5.107026e-14 10.778051 1.752507 1.767617 0.15151515 0.3500029
37 0.035551955 4.440892e-16 9.112603 2.278873 2.491135 0.12903226 0.4136193
38 0.101053178 2.486900e-14 8.154217 3.349469 3.368668 0.11764706 0.3457049
39 0.088195117 9.769963e-15 8.893903 1.980472 2.144215 0.15384615 0.3600481
40 0.075472711 3.020495e-11 11.221205 1.546289 1.815458 0.11428571 0.3859232
41 0.032008742 6.128209e-12 10.643498 2.099506 2.342299 0.09677419 0.3999147
42 0.101570984 6.323231e-11 9.454023 1.520933 1.557830 0.16666667 0.3487352
43 0.015454307 1.127773e-10 5.103043 1.317100 1.319236 0.09756098 0.5786549
44 0.083835023 1.088019e-14 8.798578 1.840936 1.975935 0.16666667 0.3610943
45 0.102443209 1.787459e-13 10.405761 1.815026 1.978974 0.16666667 0.3482702
46 0.124441915 4.618528e-14 11.140741 2.552941 2.744853 0.12903226 0.3409367
47 0.094373722 3.264056e-14 10.542056 2.058126 2.262796 0.12500000 0.3785597
48 0.010444151 4.340972e-13 7.640567 1.474969 1.529598 0.11764706 0.4197788
49 0.114197996 3.952394e-14 9.609387 1.478604 1.703049 0.15384615 0.3655778
50 0.109931881 2.220446e-15 13.568129 1.694877 1.767641 0.13793103 0.3621279
51 0.056848434 2.708944e-13 12.441176 2.066136 2.454689 0.14285714 0.3880163
52 0.069220718 6.661338e-15 9.032695 2.776830 3.175763 0.12903226 0.3536509
53 0.097560894 4.130030e-14 10.289712 2.566033 2.735920 0.12903226 0.3450232
54 0.099788892 5.084821e-14 10.654735 2.396538 2.469003 0.15384615 0.4183237
55 0.096917516 7.105427e-15 9.950450 2.400816 2.686944 0.14285714 0.3796109
56 0.084514098 3.619327e-14 13.535402 2.223220 2.283204 0.11764706 0.3661224
57 0.109475971 3.419487e-14 8.151154 1.865339 1.868131 0.12121212 0.3711694
58 0.102664816 6.661338e-15 10.313591 1.656197 1.677163 0.16000000 0.3730295
59 0.077923948 2.264855e-14 8.410135 2.033472 2.124388 0.11764706 0.3768834
60 0.107180563 2.353673e-14 9.866987 2.506863 2.579248 0.11764706 0.3544626
61 0.041173101 1.110223e-15 7.453426 2.086117 2.193811 0.11764706 0.3778805
62 0.086134434 1.021405e-14 8.555556 2.307887 2.669841 0.10714286 0.3593110
63 0.060271356 1.970046e-11 8.558642 2.448487 2.975473 0.12903226 0.3927275
64 0.069746948 1.709743e-14 10.515015 2.257986 2.389583 0.14285714 0.3802575
65 0.069634217 1.678657e-13 7.105832 2.783036 3.491826 0.12121212 0.3967087
66 0.056653533 9.559020e-12 6.838337 1.582083 1.642500 0.11764706 0.3960483
67 0.078191746 7.105427e-15 15.900997 1.634448 1.641477 0.08571429 0.4162976
68 0.090366480 1.110223e-14 8.058214 2.934425 3.720585 0.14285714 0.3534507
69 0.104822337 2.442491e-15 11.232019 2.089313 2.439475 0.14285714 0.3730069
70 0.084675415 2.811085e-13 9.523589 2.668497 3.108448 0.12903226 0.3823193
71 0.039681404 9.325873e-15 12.216479 2.512923 2.844381 0.10714286 0.4037361
72 0.028193907 4.884981e-15 13.733766 1.803593 1.926705 0.10000000 0.4038208
73 0.080986451 2.797762e-14 7.385714 1.856692 1.924558 0.11764706 0.3831373
74 0.105898369 5.329071e-14 9.626036 2.731924 2.992847 0.12903226 0.3364900
75 0.083081089 8.881784e-14 9.548093 2.630437 2.975959 0.12903226 0.3420759
76 0.076261486 4.440892e-16 11.239882 2.161748 2.277633 0.12903226 0.3928944
77 0.127143408 1.335954e-11 7.370370 1.434088 1.479906 0.15384615 0.3609324
78 0.076043063 8.149037e-14 7.272517 1.618177 1.724934 0.20588235 0.4637090
79 0.098616560 2.109424e-14 9.302083 2.638190 2.672862 0.12903226 0.3630090
80 0.091065079 2.797762e-14 7.419157 1.850260 1.905915 0.12500000 0.3933967
81 0.094957127 5.953171e-11 14.481481 1.754658 1.801954 0.16666667 0.4037368
82 0.063731499 2.220446e-15 8.164319 2.908766 3.486941 0.12903226 0.3780027
83 0.077883036 8.881784e-15 8.296296 2.302206 2.692904 0.16666667 0.3506723
84 0.082571062 1.741991e-10 8.805353 1.536338 1.673044 0.12121212 0.3627451
85 0.023928025 0.000000e+00 7.554158 2.283024 2.520053 0.11428571 0.4239070
86 0.083081089 8.881784e-14 9.548093 2.630437 2.975959 0.12903226 0.3420759
87 0.079058610 1.260769e-12 10.098856 1.677326 1.690554 0.12903226 0.3560816
88 0.057112801 1.110223e-15 6.337079 2.202474 2.411806 0.11764706 0.4611155
89 0.112501052 2.442491e-15 12.706605 1.672196 1.823476 0.13793103 0.3612112
90 0.111892510 1.929568e-13 8.863524 2.434562 2.466947 0.11764706 0.3570287
91 0.092705240 3.034462e-12 10.303303 1.616518 1.629539 0.12903226 0.3938009
92 0.077834448 4.440892e-16 8.162773 2.192290 2.616814 0.14285714 0.3627279
93 0.115265939 5.773160e-15 9.976415 1.423864 1.445073 0.12903226 0.3737607
94 0.094108311 5.329071e-15 12.020737 2.107586 2.351056 0.14814815 0.3642254
95 0.124639411 3.170797e-12 9.242976 1.656167 1.865074 0.14285714 0.3796404
96 0.084035397 2.442491e-15 10.456674 1.951200 2.159656 0.14285714 0.3558458
97 0.089741152 2.953193e-14 14.289786 2.147438 2.246257 0.13333333 0.3484366
98 0.102097133 1.110223e-15 11.644144 2.555440 2.697917 0.14285714 0.3864794
99 0.059789202 4.440892e-16 6.130435 2.268899 2.411458 0.11764706 0.3972490
100 0.125187002 1.312284e-13 14.215360 1.239986 1.340174 0.20000000 0.3510926
for (i in 2:10){df_clust_opt <- tsclust(df_lst, type = "partitional", k = i, distance = "dtw", centroid = "pam",seed = 700)
plot(df_clust_opt)}
for (i in 1:10){df_clust_opt_seed <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = i)
plot(df_clust_opt_seed)}
for (i in 11:20){df_clust_opt_seed <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = i)
plot(df_clust_opt_seed)}
for (i in 21:30){df_clust_opt_seed <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = i)
plot(df_clust_opt_seed)}
#k4
df_clust_opt_final <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = 700)
plot(df_clust_opt_final)
# Extract cluster assignments
cluster_assignments <- df_clust_opt_final@cluster
# Determine the number of clusters
num_clusters <- max(cluster_assignments)
# Loop through each cluster and print the jurisdictions in it
for (cluster_number in 1:num_clusters) {
cat("Jurisdictions in Cluster", cluster_number, ":\n")
# Find the indices of jurisdictions in this cluster
indices_in_cluster <- which(cluster_assignments == cluster_number)
# Print the jurisdictions corresponding to these indices
print(jurisdiction[indices_in_cluster])
cat("\n") # Add a newline for readability
}
Jurisdictions in Cluster 1 :
[1] "Alabama" "Arkansas" "Hawaii" "Mississippi" "South Carolina"
Jurisdictions in Cluster 2 :
[1] "Alaska" "California" "Colorado" "Connecticut" "Florida" "Georgia" "Idaho" "Illinois"
[9] "Indiana" "Iowa" "Kansas" "Kentucky" "Louisiana" "Maryland" "Massachusetts" "Michigan"
[17] "Minnesota" "National" "Nevada" "New Hampshire" "New Jersey" "New Mexico" "New York" "North Carolina"
[25] "North Dakota" "Ohio" "Oregon" "Pennsylvania" "Rhode Island" "South Dakota" "Tennessee" "Utah"
[33] "Vermont" "Virginia" "Washington" "West Virginia" "Wisconsin" "Wyoming"
Jurisdictions in Cluster 3 :
[1] "Delaware" "Missouri" "Oklahoma"
Jurisdictions in Cluster 4 :
[1] "Arizona" "Maine" "Montana" "Nebraska" "Texas"
# Load necessary libraries
library(ggplot2)
library(reshape2)
# Loop through each cluster
for (cluster_number in 1:num_clusters) {
cat("Plotting jurisdictions in Cluster", cluster_number, ":\n")
# Find the indices of jurisdictions in this cluster
indices_in_cluster <- which(cluster_assignments == cluster_number)
# Get the names of the jurisdictions in this cluster
jurisdictions_in_cluster <- jurisdiction[indices_in_cluster]
# Filter the gap_list_year data frame for these jurisdictions
cluster_data <- df[df$Jurisdiction %in% jurisdictions_in_cluster, ]
# Convert the data to long format for ggplot
long_df <- melt(cluster_data, id.vars = "Jurisdiction", variable.name = "Year", value.name = "Value")
# Plot
p <- ggplot(long_df, aes(x = Year, y = Value, group = Jurisdiction, color = Jurisdiction)) +
geom_line() +
labs(title = paste("Cluster", cluster_number), x = "Year", y = "Gap") +
theme(legend.position = "right")
print(p)
ggsave(paste("cluster_", cluster_number, ".png", sep=""), plot = p)
}
Plotting jurisdictions in Cluster 1 :
Plotting jurisdictions in Cluster 2 :
Plotting jurisdictions in Cluster 3 :
Plotting jurisdictions in Cluster 4 :